Zhou, Y., Song, W.M., Andhey, P.S., Swain, A., Levy, T., Miller, K.R., Poliani, P.L., Cominelli, M., Grover, S., Gilfillan, S., et al. (2020). Human and mouse single-nucleus transcriptomics reveal TREM2-dependent and TREM2-independent cellular responses in Alzheimer’s disease. Nat. Med. 26, 131–142.



Load required packages.

library(tidyverse)
library(magrittr)
library(Matrix)
library(extrafont)
library(ggrepel)
library(patchwork)
# library(tidylog)
Sys.time()
## [1] "2020-12-16 01:26:11 CST"

Data preparation

Functions loading

source(
    file = file.path(
        SCRIPT_DIR,
        "utilities.R"
    )
)

load_matrix <- function(x) {
    matrix_readcount_use <- scipy.sparse$load_npz(
        file.path(x, "matrix_readcount.npz")
    )

    colnames(matrix_readcount_use) <- np$load(
        file.path(x, "matrix_readcount_barcodes.npy")
    )

    rownames(matrix_readcount_use) <- np$load(
        file.path(x, "matrix_readcount_features.npy")
    )

    return(matrix_readcount_use)
}

Data loading

PROJECT_DIR <- "/Users/jialei/Dropbox/Data/Projects/UTSW/Collaborations/Microglia/raw/public/PRJNA590042/remapped"

Matrix

np <- reticulate::import("numpy", convert = TRUE)
scipy.sparse <- reticulate::import(module = "scipy.sparse", convert = TRUE)

matrix_dir <- list(
    "matrices"
)

matrix_readcount_use <- purrr::map(matrix_dir, function(x) {
    load_matrix(file.path(PROJECT_DIR, x))
}) %>%
    purrr::reduce(cbind)

walk(list(matrix_readcount_use, matrix_dir), function(x) {
    print(object.size(x), units = "auto", standard = "SI")
})
## 3.4 GB
## 176 B
# clean up
rm(matrix_dir)

Embedding

EMBEDDING_FILE <- "embedding_ncomponents35_ccc1_seed20200416.csv.gz"

embedding <- read_csv(
    file = file.path(
        PROJECT_DIR,
        "clustering",
        "exploring",
        EMBEDDING_FILE
    )
)

# clean up
rm(EMBEDDING_FILE)

Metadata

cell_metadata_PRJNA590042 <- read_delim(
    file.path(
        PROJECT_DIR,
        "raw/",
        "SraRunTable.csv"
    ),
    delim = ","
) %>%
    select(
        run = Run,
        age = Age,
        sample_name = `Sample Name`,
        mouse_genotype,
        source_name
    )

embedding %<>%
    left_join(
        cell_metadata_PRJNA590042 %>%
            select(sample_name, genotype = mouse_genotype),
        by = c("batch" = "sample_name")
    ) %>%
    mutate(
        genotype = factor(
            genotype,
            levels = c("wt", "5XFAD", "Trem2-/-", "Trem2-/- 5XFAD")
        ) %>% fct_recode(WT = "wt")
    )

cell_metadata_PRJNA590042
## # A tibble: 12 x 5
##    run      age     sample_name mouse_genotype source_name                      
##    <chr>    <chr>   <chr>       <chr>          <chr>                            
##  1 SRR1048… 7 mont… GSM4173504  wt             Isolated single nuclei from brai…
##  2 SRR1048… 7 mont… GSM4173505  wt             Isolated single nuclei from brai…
##  3 SRR1048… 7 mont… GSM4173506  wt             Isolated single nuclei from brai…
##  4 SRR1048… 7 mont… GSM4173507  Trem2-/-       Isolated single nuclei from brai…
##  5 SRR1048… 7 mont… GSM4173508  Trem2-/-       Isolated single nuclei from brai…
##  6 SRR1048… 7 mont… GSM4173509  Trem2-/-       Isolated single nuclei from brai…
##  7 SRR1048… 7 mont… GSM4173510  5XFAD          Isolated single nuclei from brai…
##  8 SRR1048… 7 mont… GSM4173511  5XFAD          Isolated single nuclei from brai…
##  9 SRR1048… 7 mont… GSM4173512  5XFAD          Isolated single nuclei from brai…
## 10 SRR1048… 7 mont… GSM4173513  Trem2-/- 5XFAD Isolated single nuclei from brai…
## 11 SRR1048… 7 mont… GSM4173514  Trem2-/- 5XFAD Isolated single nuclei from brai…
## 12 SRR1048… 7 mont… GSM4173515  Trem2-/- 5XFAD Isolated single nuclei from brai…

Distinguishing major brain-cell types

Summary

embedding %>%
    mutate(
        num_umis = colSums(matrix_readcount_use[, cell]),
        num_genes = colSums(matrix_readcount_use[, cell] > 0)
    ) %>%
    group_by(louvain) %>%
    summarise(
        num_cells = n(),
        median_umis = median(num_umis),
        median_genes = median(num_genes)
    ) %>%
    gt::gt() %>%
    gt::tab_options(table.font.size = "median")
## `summarise()` has ungrouped output. You can override using the `.groups` argument.
louvain num_cells median_umis median_genes
0 8953 2509.0 1439.0
1 7856 14239.0 4261.0
2 6987 13770.0 4518.0
3 6372 11030.0 3962.5
4 6252 13780.0 4370.0
5 5856 701.5 547.0
6 5070 18558.5 5025.0
7 5029 16350.0 4841.0
8 4479 3236.0 1718.0
9 3972 731.0 538.0
10 3906 4403.0 2246.0
11 3696 2704.0 1721.0
12 2964 11285.0 4214.5
13 2484 12135.5 4364.5
14 2316 8534.5 3514.5
15 2215 5280.0 2586.0
16 1752 23367.5 5638.0
17 1740 10955.5 4105.5
18 1610 6057.5 2650.0
19 996 7048.0 3065.0
20 906 10770.5 3921.5
21 814 5551.0 2565.0
22 682 16883.5 4925.0
23 584 1835.0 1225.5
24 537 884.0 640.0
25 528 2872.0 1742.0
26 474 3503.0 2050.5
27 312 4221.5 2078.0
28 238 2310.5 1524.5
29 134 13254.0 4724.0
30 112 5206.0 2697.0
31 47 9194.0 3847.0


embedding %>%
    mutate(
        num_umis = colSums(matrix_readcount_use[, cell]),
        num_genes = colSums(matrix_readcount_use[, cell] > 0)
    ) %>%
    group_by(genotype) %>%
    summarise(
        num_cells = n(),
        median_umis = median(num_umis),
        median_genes = median(num_genes)
    ) %>%
    gt::gt() %>%
    gt::tab_options(table.font.size = "median")
## `summarise()` has ungrouped output. You can override using the `.groups` argument.
genotype num_cells median_umis median_genes
WT 24034 7677.0 3313.5
5XFAD 22069 5824.0 2633.0
Trem2-/- 22884 7802.0 3325.0
Trem2-/- 5XFAD 20886 8696.5 3472.5


purrr::map(sort(unique(embedding$louvain)), function(x) {
    cells_in_group <- embedding %>%
        filter(louvain == x) %>%
        pull(cell)

    colSums(matrix_readcount_use[, cells_in_group]) %>%
        enframe(name = "cell") %>%
        mutate(group = x)
}) %>%
    dplyr::bind_rows() %>%
    mutate(
        group = factor(
            group,
            levels = sort(unique(embedding$louvain)) %>% rev()
        ),
        category = "UMI distribution"
    ) %>%
    plot_violin_umi(
        x = value,
        y = group,
        z = "category"
    ) +
    # match cluster colors
    ggplot2::scale_color_manual(
        values = gg_color_hue(n = length(unique(embedding$louvain))) %>% rev(.)
    ) +
    ggplot2::scale_fill_manual(
        values = gg_color_hue(n = length(unique(embedding$louvain))) %>% rev(.)
    )

Embedding visualization

EMBEDDING_TITLE_PREFIX <- "t-SNE"

x_column <- "x_tsne"
y_column <- "y_tsne"


Clustering & UMI & batch & genotype

p_embedding_cluster <- plot_embedding(
    embedding = embedding[, c(x_column, y_column)],
    color_values = embedding$louvain %>% as.factor(),
    label = paste0(EMBEDDING_TITLE_PREFIX, "; Cluster"),
    label_position = NULL,
    show_color_value_labels = TRUE,
    show_color_legend = FALSE,
    geom_point_size = 0.3,
    sort_values = FALSE
) +
    scale_color_manual(
        values = gg_color_hue(n = length(unique(embedding$louvain)))
    ) +
    # labs(color = NULL) +
    customized_theme()

p_embedding_batch <- plot_embedding(
    embedding = embedding[, c(x_column, y_column)],
    color_values = embedding$batch %>% as.factor(),
    label = paste0(EMBEDDING_TITLE_PREFIX, "; Batch"),
    label_position = NULL,
    show_color_value_labels = FALSE,
    show_color_legend = TRUE,
    geom_point_size = 0.15,
    sort_values = FALSE
) +
    scale_color_manual(
        values = gg_color_hue(n = length(unique(embedding$batch)))
    ) +
    guides(colour = guide_legend(override.aes = list(size = 3), ncol = 1)) +
    labs(color = NULL) +
    customized_theme()

CB_POSITION <- c(0.83, 0.275)
p_embedding_umi <- plot_embedding_value(
    embedding = embedding[, c(x_column, y_column)],
    color_values = log10(
        Matrix::colSums(matrix_readcount_use[, embedding$cell]) + 1
    ),
    colorbar_position = CB_POSITION,
    label = paste0(EMBEDDING_TITLE_PREFIX, "; UMI"),
    label_position = NULL,
    geom_point_size = 0.4,
    sort_values = TRUE,
    FUN = function(x) x
)

p_embedding_genotype <- plot_embedding(
    embedding = embedding[, c(x_column, y_column)],
    color_values = embedding$genotype,
    label = paste0(EMBEDDING_TITLE_PREFIX, "; Genotype"),
    label_position = NULL,
    show_color_value_labels = FALSE,
    show_color_legend = TRUE,
    geom_point_size = 0.15,
    sort_values = FALSE
) +
    scale_color_manual(
        # values = gg_color_hue(n = length(unique(embedding$genotype)))
        values = yarrr::piratepal(palette = "google") %>% as.character()
    ) +
    guides(colour = guide_legend(override.aes = list(size = 3), ncol = 1)) +
    labs(color = NULL) +
    customized_theme()


Fig.1b

list(
    p_embedding_cluster,
    p_embedding_umi,
    p_embedding_genotype,
    p_embedding_batch
) %>%
    purrr::reduce(`+`) +
    patchwork::plot_layout(ncol = 2) +
    patchwork::plot_annotation(
        theme = theme(plot.margin = margin())
    )

Batch; separated

purrr::map(sort(unique(embedding$batch)), function(x) {
    plot_embedding(
        embedding = embedding[, c(x_column, y_column)],
        color_values = as.numeric(embedding$batch == x) %>% as.factor(),
        label = paste0(EMBEDDING_TITLE_PREFIX, "; ", x),
        label_position = NULL,
        show_color_value_labels = FALSE,
        show_color_legend = FALSE,
        geom_point_size = 0.18,
        sort_values = TRUE
    ) +
        scale_color_manual(
            values = c("grey70", "salmon")
        )
}) %>%
    purrr::reduce(`+`) +
    plot_layout(ncol = 3) +
    plot_annotation(
        theme = theme(plot.margin = margin())
    )

Expression

matrix_cpm_use <- calc_cpm(matrix_readcount_use)

walk(list(matrix_cpm_use), function(x) {
    print(object.size(x), units = "auto", standard = "SI")
})

# clean up
gc()
## 3.4 GB
##             used   (Mb) gc trigger    (Mb) limit (Mb)   max used    (Mb)
## Ncells   3194034  170.6    6103355   326.0         NA    4300675   229.7
## Vcells 728772407 5560.1 1491146679 11376.6      16384 1464667485 11174.6

Embedding

FEATURES_SELECTED <- c(
    "ENSMUSG00000024621_Csf1r",
    "ENSMUSG00000052336_Cx3cr1",
    "ENSMUSG00000036353_P2ry12",
    "ENSMUSG00000036887_C1qa"
)

purrr::map(FEATURES_SELECTED, function(x) {
    selected_feature <- x

    plot_embedding_value(
        embedding = embedding[, c(x_column, y_column)],
        color_values = log10(
            matrix_cpm_use[selected_feature, embedding$cell] + 1
        ),
        colorbar_position = CB_POSITION,
        label = paste0(EMBEDDING_TITLE_PREFIX, "; ", x),
        # label_position = c(x_label, y_label),
        geom_point_size = 0.4,
        sort_values = TRUE,
        FUN = function(x) x
    )
}) %>%
    purrr::reduce(`+`) +
    plot_layout(nrow = 2) +
    plot_annotation(theme = theme(plot.margin = margin()))

grep(pattern = "itm2a", x = rownames(matrix_cpm_use), ignore.case = TRUE, value = TRUE)

FEATURES_SELECTED <- c(
    # Pan-neuronal
    "ENSMUSG00000026959_Grin1",
    "ENSMUSG00000025576_Rbfox3",
    # Excitatory neuron
    "ENSMUSG00000070570_Slc17a7",
    "ENSMUSG00000038331_Satb2",
    # Astrocyte
    "ENSMUSG00000024411_Aqp4",
    "ENSMUSG00000050953_Gja1",
    # Oligodendrocyte
    "ENSMUSG00000076439_Mog",
    "ENSMUSG00000037625_Cldn11",
    # OPC
    "ENSMUSG00000029231_Pdgfra",
    "ENSMUSG00000021614_Vcan",
    # Endothelial cell
    "ENSMUSG00000017344_Vtn",
    "ENSMUSG00000031239_Itm2a"
)

FEATURES_SELECTED_ANNOTATION <- c(
    "Pan-neuronal", "Excitatory neuron", "Astrocyte",
    "Oligodendrocyte", "OPC", "Endothelial cell"
) %>%
    rep(each = 2)
## [1] "ENSMUSG00000031239_Itm2a"


ED Fig.1b

# cell type specific markers
purrr::map2(FEATURES_SELECTED, FEATURES_SELECTED_ANNOTATION, function(x, y) {
    selected_feature <- x

    plot_embedding_value(
        embedding = embedding[, c(x_column, y_column)],
        color_values = log10(
            matrix_cpm_use[selected_feature, embedding$cell] + 1
        ),
        colorbar_position = CB_POSITION,
        label = paste(
            paste0(EMBEDDING_TITLE_PREFIX, "; ", x %>% str_remove("^E.+_")),
            y,
            sep = "; "
        ),
        # label_position = c(x_label, y_label),
        geom_point_size = 0.4,
        sort_values = TRUE,
        FUN = function(x) x
    )
}) %>%
    purrr::reduce(`+`) +
    plot_layout(ncol = 2) +
    plot_annotation(theme = theme(plot.margin = margin()))


ED Fig.1b

# Inhibitory neuron
FEATURES_SELECTED <- c(
    "ENSMUSG00000026837_Col5a1",
    "ENSMUSG00000070880_Gad1",
    "ENSMUSG00000061762_Tac1",
    "ENSMUSG00000004366_Sst",
    "ENSMUSG00000041592_Sdk2",
    "ENSMUSG00000026787_Gad2",
    "ENSMUSG00000045573_Penk",
    "ENSMUSG00000029819_Npy"
)

purrr::map(FEATURES_SELECTED, function(x) {
    selected_feature <- x

    plot_embedding_value(
        embedding = embedding[, c(x_column, y_column)],
        color_values = log10(
            matrix_cpm_use[selected_feature, embedding$cell] + 1
        ),
        colorbar_position = CB_POSITION,
        label = paste(
            EMBEDDING_TITLE_PREFIX,
            x %>% str_remove("^E.+_"),
            "Inhibitory neuron",
            sep = "; "
        ),
        # label_position = c(x_label, y_label),
        geom_point_size = 0.4,
        sort_values = TRUE,
        FUN = function(x) x
    )
}) %>%
    purrr::reduce(`+`) +
    plot_layout(ncol = 2) +
    plot_annotation(theme = theme(plot.margin = margin()))

Lollipop

clusters_selected_lollipop <- sort(unique(embedding$louvain))
cells_selected_lollipop <- lapply(clusters_selected_lollipop, function(x) {
    embedding$cell[embedding$louvain == x]
})
names(cells_selected_lollipop) <- clusters_selected_lollipop

FEATURES_LOLLIPOP <- c(
    "ENSMUSG00000026959_Grin1",
    "ENSMUSG00000035864_Syt1",
    "ENSMUSG00000025576_Rbfox3",
    "ENSMUSG00000027273_Snap25",
    "ENSMUSG00000059003_Grin2a",
    "ENSMUSG00000070570_Slc17a7",
    "ENSMUSG00000038331_Satb2",
    "ENSMUSG00000070880_Gad1",
    "ENSMUSG00000026787_Gad2",
    "ENSMUSG00000004366_Sst",
    "ENSMUSG00000029819_Npy",
    "ENSMUSG00000031425_Plp1",
    "ENSMUSG00000041607_Mbp",
    "ENSMUSG00000037625_Cldn11",
    "ENSMUSG00000076439_Mog",
    "ENSMUSG00000024617_Camk2a",
    "ENSMUSG00000032532_Cck",
    "ENSMUSG00000033161_Atp1a1",
    "ENSMUSG00000023868_Pde10a",
    "ENSMUSG00000061762_Tac1",
    "ENSMUSG00000045573_Penk",
    "ENSMUSG00000041592_Sdk2",
    "ENSMUSG00000026837_Col5a1",
    "ENSMUSG00000025582_Nptx1",
    "ENSMUSG00000059173_Pde1a",
    "ENSMUSG00000036617_Etl4",
    "ENSMUSG00000005089_Slc1a2",
    "ENSMUSG00000050953_Gja1",
    "ENSMUSG00000024411_Aqp4",
    "ENSMUSG00000021665_Hexb",
    "ENSMUSG00000024621_Csf1r",
    "ENSMUSG00000036887_C1qa",
    "ENSMUSG00000036353_P2ry12",
    "ENSMUSG00000029231_Pdgfra",
    "ENSMUSG00000021614_Vcan",
    "ENSMUSG00000032911_Cspg4",
    "ENSMUSG00000046160_Olig1",
    "ENSMUSG00000017344_Vtn",
    "ENSMUSG00000029648_Flt1",
    "ENSMUSG00000041378_Cldn5"
)

p_dotplot_features_selected <- plot_lollipop(
    cells = cells_selected_lollipop,
    features = FEATURES_LOLLIPOP,
    matrix_cpm = matrix_cpm_use
)
p_dotplot_features_selected

Violin

FEATURES_VIOLIN <- c(
    "ENSMUSG00000005533_Igf1r",
    "ENSMUSG00000022265_Ank",
    "ENSMUSG00000002985_Apoe",
    "ENSMUSG00000029207_Apbb2",
    "ENSMUSG00000021109_Hif1a",
    "ENSMUSG00000007891_Ctsd",
    "ENSMUSG00000023992_Trem2"
)

plot_violin(
    cells = cells_selected_lollipop,
    features = FEATURES_VIOLIN,
    matrix_cpm = matrix_cpm_use,
    x_range_breaks = NULL
) +
    # match cluster colors
    ggplot2::scale_color_manual(
        values = gg_color_hue(n = length(unique(embedding$louvain))) %>% rev(.)
    ) +
    ggplot2::scale_fill_manual(
        values = gg_color_hue(n = length(unique(embedding$louvain))) %>% rev(.)
    )

Cluster composition

p_barplot_cluster_composition_batch <- prepare_cluster_composition(
    embedding = embedding,
    x = louvain,
    group = batch
) %>%
    mutate(
        louvain = as.factor(louvain)
    ) %>%
    plot_barplot(
        x = louvain,
        y = percentage,
        z = batch
    )


Fig.1e

p_barplot_cluster_composition_batch

p_barplot_cluster_composition_genotype <- prepare_cluster_composition(
    embedding = embedding,
    x = louvain,
    group = genotype
) %>%
    mutate(
        louvain = as.factor(louvain)
    ) %>%
    plot_barplot(
        x = louvain,
        y = percentage,
        z = genotype
    ) +
    scale_fill_manual(
        values = yarrr::piratepal(palette = "google") %>% as.character()
    )
p_barplot_cluster_composition_genotype

Subclustering microglia

embedding_microglia <- read_csv(
    file = file.path(
        PROJECT_DIR,
        "clustering",
        "subclustering",
        "exploring",
        "embedding_ncomponents8_ccc1_seed20200416.csv.gz"
    )
) %>%
    left_join(
        cell_metadata_PRJNA590042 %>%
            select(sample_name, genotype = mouse_genotype),
        by = c("batch" = "sample_name")
    ) %>%
    mutate(
        genotype = factor(
            genotype,
            levels = c("wt", "5XFAD", "Trem2-/-", "Trem2-/- 5XFAD")
        ) %>% fct_recode(WT = "wt")
    )
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   cell = col_character(),
##   batch = col_character(),
##   louvain = col_double(),
##   x_tsne = col_double(),
##   y_tsne = col_double(),
##   x_umap = col_double(),
##   y_umap = col_double(),
##   x_fitsne = col_double(),
##   y_fitsne = col_double(),
##   x_phate = col_double(),
##   y_phate = col_double(),
##   `x_min_dist=0.1` = col_double(),
##   `y_min_dist=0.1` = col_double(),
##   x_multicoretsne = col_double(),
##   y_multicoretsne = col_double()
## )
matrix_readcount_use <- matrix_readcount_use[, embedding_microglia$cell]
matrix_cpm_use <- calc_cpm(matrix_readcount_use)

walk(list(matrix_readcount_use, matrix_cpm_use), function(x) {
    print(object.size(x), units = "auto", standard = "SI")
})
## 95.5 MB
## 95.5 MB

Embedding visualization

EMBEDDING_TITLE_PREFIX <- "t-SNE"

x_column <- "x_tsne"
y_column <- "y_tsne"


Clustering & UMI & batch & genotype

p_embedding_microglia_cluster <- plot_embedding(
    embedding = embedding_microglia[, c(x_column, y_column)],
    color_values = embedding_microglia$louvain %>% as.factor(),
    label = paste0(EMBEDDING_TITLE_PREFIX, "; Cluster"),
    label_position = NULL,
    show_color_value_labels = TRUE,
    show_color_legend = FALSE,
    geom_point_size = 0.6,
    sort_values = FALSE
) +
    scale_color_manual(
        values = ggthemes::tableau_color_pal("Tableau 20")(
            length(unique(embedding_microglia$louvain))
        )
    ) +
    # labs(color = NULL) +
    customized_theme()

p_embedding_microglia_batch <- plot_embedding(
    embedding = embedding_microglia[, c(x_column, y_column)],
    color_values = embedding_microglia$batch %>% as.factor(),
    label = paste0(EMBEDDING_TITLE_PREFIX, "; Batch"),
    label_position = NULL,
    show_color_value_labels = FALSE,
    show_color_legend = TRUE,
    geom_point_size = 0.35,
    sort_values = FALSE
) +
    scale_color_manual(
        values = gg_color_hue(n = length(unique(embedding_microglia$batch)))
    ) +
    guides(colour = guide_legend(override.aes = list(size = 3), ncol = 1)) +
    labs(color = NULL) +
    customized_theme()

CB_POSITION <- c(0.83, 0.275)
p_embedding_microglia_umi <- plot_embedding_value(
    embedding = embedding_microglia[, c(x_column, y_column)],
    color_values = log10(
        Matrix::colSums(matrix_readcount_use[, embedding_microglia$cell]) + 1
    ),
    colorbar_position = CB_POSITION,
    label = paste0(EMBEDDING_TITLE_PREFIX, "; UMI"),
    label_position = NULL,
    geom_point_size = 0.6,
    sort_values = TRUE,
    FUN = function(x) x
)

p_embedding_microglia_genotype <- plot_embedding(
    embedding = embedding_microglia[, c(x_column, y_column)],
    color_values = embedding_microglia$genotype,
    label = paste0(EMBEDDING_TITLE_PREFIX, "; Genotype"),
    label_position = NULL,
    show_color_value_labels = FALSE,
    show_color_legend = TRUE,
    geom_point_size = 0.4,
    sort_values = FALSE
) +
    scale_color_manual(
        # values = gg_color_hue(n = length(unique(embedding$genotype)))
        values = yarrr::piratepal(palette = "google") %>% as.character()
    ) +
    guides(colour = guide_legend(override.aes = list(size = 3), ncol = 1)) +
    labs(color = NULL) +
    customized_theme()


Fig.2e

list(
    p_embedding_microglia_cluster,
    p_embedding_microglia_umi,
    p_embedding_microglia_genotype,
    p_embedding_microglia_batch
) %>%
    purrr::reduce(`+`) +
    patchwork::plot_layout(ncol = 2) +
    patchwork::plot_annotation(
        theme = theme(plot.margin = margin())
    )

Expression

Embedding


Fig.2g

FEATURES_SELECTED <- c(
    "ENSMUSG00000024621_Csf1r",
    "ENSMUSG00000052336_Cx3cr1",
    "ENSMUSG00000036353_P2ry12",
    "ENSMUSG00000036887_C1qa"
)

purrr::map(FEATURES_SELECTED, function(x) {
    selected_feature <- x

    plot_embedding_value(
        embedding = embedding_microglia[, c(x_column, y_column)],
        color_values = log10(
            matrix_cpm_use[selected_feature, embedding_microglia$cell] + 1
        ),
        colorbar_position = CB_POSITION,
        label = paste0(EMBEDDING_TITLE_PREFIX, "; ", x),
        # label_position = c(x_label, y_label),
        geom_point_size = 0.6,
        sort_values = TRUE,
        FUN = function(x) x
    )
}) %>%
    purrr::reduce(`+`) +
    plot_layout(nrow = 2) +
    plot_annotation(theme = theme(plot.margin = margin()))

Cluster composition

Batch

p_barplot_cluster_composition_batch <- prepare_cluster_composition(
    embedding = embedding_microglia,
    x = louvain,
    group = batch
) %>%
    mutate(
        louvain = as.factor(louvain)
    ) %>%
    plot_barplot(
        x = louvain,
        y = percentage,
        z = batch
    )
# colorspace::scale_fill_discrete_qualitative(palette = "Dark 3")
p_barplot_cluster_composition_batch

Genotype

p_barplot_cluster_composition_genotype <- prepare_cluster_composition(
    embedding = embedding_microglia,
    x = louvain,
    group = genotype
) %>%
    mutate(
        louvain = as.factor(louvain)
    ) %>%
    plot_barplot(
        x = louvain,
        y = percentage,
        z = genotype
    ) +
    scale_fill_manual(
        values = yarrr::piratepal(palette = "google") %>% as.character()
    )
p_barplot_cluster_composition_genotype

Genotype; reversed


Fig.2f

prepare_cluster_composition(
    embedding = embedding_microglia,
    x = genotype,
    group = louvain
) %>%
    arrange(louvain) %>%
    mutate(
        louvain = factor(louvain),
        genotype = fct_rev(genotype)
    ) %>%
    plot_barplot(x = genotype, y = percentage, z = louvain) +
    coord_flip() +
    scale_fill_manual(
        values = ggthemes::tableau_color_pal("Tableau 20")(
            length(unique(embedding_microglia$louvain))
        )
    )

Heatmap

FEATURES_HEATMAP <- c(
    "ENSMUSG00000018126_Baiap2l2",
    "ENSMUSG00000068129_Cst7",
    "ENSMUSG00000014599_Csf1",
    "ENSMUSG00000015568_Lpl",
    # "ENSMUSG00000062593_Lilrb4a",
    "ENSMUSG00000030789_Itgax",
    "ENSMUSG00000005533_Igf1r",
    "ENSMUSG00000000982_Ccl3",
    "ENSMUSG00000050370_Ch25h",
    "ENSMUSG00000022265_Ank",
    "ENSMUSG00000002602_Axl",
    "ENSMUSG00000024610_Cd74",
    "ENSMUSG00000002985_Apoe",
    "ENSMUSG00000069516_Lyz2",
    "ENSMUSG00000029207_Apbb2",
    "ENSMUSG00000021109_Hif1a",
    "ENSMUSG00000018927_Ccl6",
    "ENSMUSG00000029816_Gpnmb",
    "ENSMUSG00000025351_Cd63",
    "ENSMUSG00000073411_H2-D1",
    "ENSMUSG00000007891_Ctsd",
    "ENSMUSG00000023992_Trem2",
    "ENSMUSG00000052336_Cx3cr1",
    "ENSMUSG00000033192_Lpcat2",
    "ENSMUSG00000048163_Selplg",
    "ENSMUSG00000036353_P2ry12",
    "ENSMUSG00000054675_Tmem119",
    "ENSMUSG00000079227_Ccr5",
    "ENSMUSG00000029343_Crybb1"
)

matrix_heatmap <- sapply(sort(unique(embedding_microglia$batch)), function(x) {
    cells_in_group <- embedding_microglia %>%
        filter(batch == x) %>%
        pull(cell)

    Matrix::rowMeans(matrix_cpm_use[FEATURES_HEATMAP, cells_in_group])
})

matrix_heatmap <- log10(matrix_heatmap + 1)
matrix_heatmap <- t(scale(t(matrix_heatmap)))
rownames(matrix_heatmap) <- str_remove(
    string = rownames(matrix_heatmap),
    pattern = "^E.+_"
)


Fig.2c

ComplexHeatmap::Heatmap(
    matrix = matrix_heatmap,
    col = wesanderson::wes_palette("Zissou1", 100, type = "continuous"),
    rect_gp = grid::gpar(col = "white", lwd = 0.1),
    #
    cluster_rows = TRUE,
    cluster_columns = FALSE,
    #
    row_names_gp = grid::gpar(fontfamily = "Arial", fontsize = 6),
    column_names_gp = grid::gpar(fontfamily = "Arial", fontsize = 6),
    #
    heatmap_legend_param = list(
        title = "Z score",
        title_gp = grid::gpar(
            fontfamily = "Arial",
            fontsize = 6
        ),
        legend_direction = "vertical",
        labels_gp = grid::gpar(
            fontfamily = "Arial",
            fontsize = 5
        ),
        legend_height = unit(20, "mm"),
        legend_width = unit(8, "mm")
    )
    # heatmap_width = unit(8, "cm")
    # heatmap_height = unit(8, "cm")
)

R session info

devtools::session_info()$platform
##  setting  value                       
##  version  R version 4.0.3 (2020-10-10)
##  os       macOS  11.1                 
##  system   x86_64, darwin20.1.0        
##  ui       unknown                     
##  language (EN)                        
##  collate  en_US.UTF-8                 
##  ctype    en_US.UTF-8                 
##  tz       America/Chicago             
##  date     2020-12-16
devtools::session_info()$pack %>%
    as_tibble() %>%
    dplyr::select(
        package,
        loadedversion,
        date,
        `source`
    ) %>%
    gt::gt() %>%
    gt::tab_options(table.font.size = "median")
package loadedversion date source
assertthat 0.2.1 2019-03-21 CRAN (R 4.0.0)
backports 1.2.1 2020-12-09 CRAN (R 4.0.3)
BayesFactor 0.9.12-4.2 2018-05-19 CRAN (R 4.0.3)
BiocGenerics 0.36.0 2020-10-27 Bioconductor
broom 0.7.2.9000 2020-12-12 Github (tidymodels/broom@810115d)
Cairo 1.5-12.2 2020-07-07 CRAN (R 4.0.3)
callr 3.5.1.9000 2020-11-23 Github (r-lib/callr@d44e660)
cellranger 1.1.0 2016-07-27 CRAN (R 4.0.0)
checkmate 2.0.0 2020-02-06 CRAN (R 4.0.0)
circlize 0.4.11 2020-10-31 CRAN (R 4.0.3)
cli 2.2.0 2020-11-20 CRAN (R 4.0.3)
clue 0.3-58 2020-12-03 CRAN (R 4.0.3)
cluster 2.1.0 2019-06-19 CRAN (R 4.0.3)
coda 0.19-4 2020-09-30 CRAN (R 4.0.2)
codetools 0.2-18 2020-11-04 CRAN (R 4.0.3)
colorspace 2.0-0 2020-11-10 R-Forge (R 4.0.3)
ComplexHeatmap 2.6.2 2020-11-12 Bioconductor
crayon 1.3.4 2017-09-16 CRAN (R 4.0.0)
DBI 1.1.0 2019-12-15 CRAN (R 4.0.0)
dbplyr 2.0.0 2020-11-03 CRAN (R 4.0.3)
desc 1.2.0 2018-05-01 CRAN (R 4.0.0)
devtools 2.3.1.9000 2020-12-05 Github (r-lib/devtools@b3be019)
digest 0.6.27 2020-10-24 CRAN (R 4.0.3)
dplyr 1.0.2.9000 2020-12-14 Github (tidyverse/dplyr@59105eb)
ellipsis 0.3.1 2020-05-15 CRAN (R 4.0.3)
evaluate 0.14 2019-05-28 CRAN (R 4.0.0)
extrafont 0.17 2014-12-08 CRAN (R 4.0.2)
extrafontdb 1.0 2012-06-11 CRAN (R 4.0.0)
fansi 0.4.1 2020-01-08 CRAN (R 4.0.0)
farver 2.0.3 2020-01-16 CRAN (R 4.0.0)
forcats 0.5.0.9000 2020-12-09 Github (tidyverse/forcats@6c5e59f)
fs 1.5.0 2020-07-31 CRAN (R 4.0.3)
generics 0.1.0 2020-10-31 CRAN (R 4.0.3)
GetoptLong 1.0.5 2020-12-15 CRAN (R 4.0.3)
ggplot2 3.3.2.9000 2020-12-14 Github (tidyverse/ggplot2@9deb97b)
ggrepel 0.8.2 2020-03-08 CRAN (R 4.0.3)
ggthemes 4.2.0 2020-12-10 Github (jrnold/ggthemes@fa3d1ac)
GlobalOptions 0.1.2 2020-06-10 CRAN (R 4.0.0)
glue 1.4.2 2020-08-27 CRAN (R 4.0.2)
gt 0.2.2 2020-11-25 Github (rstudio/gt@bae32f4)
gtable 0.3.0 2019-03-25 CRAN (R 4.0.0)
gtools 3.8.2 2020-03-31 CRAN (R 4.0.0)
haven 2.3.1 2020-06-01 CRAN (R 4.0.0)
hms 0.5.3 2020-01-08 CRAN (R 4.0.0)
htmltools 0.5.0.9003 2020-12-03 Github (rstudio/htmltools@d18bd8e)
httr 1.4.2 2020-07-20 CRAN (R 4.0.2)
IRanges 2.24.1 2020-12-12 Bioconductor
jpeg 0.1-8.1 2019-10-24 CRAN (R 4.0.0)
jsonlite 1.7.2 2020-12-09 CRAN (R 4.0.3)
knitr 1.30.4 2020-12-15 Github (yihui/knitr@f8f90ba)
labeling 0.4.2 2020-10-20 CRAN (R 4.0.3)
lattice 0.20-41 2020-04-02 CRAN (R 4.0.3)
lifecycle 0.2.0 2020-03-06 CRAN (R 4.0.0)
lubridate 1.7.9.2 2020-12-12 Github (tidyverse/lubridate@04be2a8)
magick 2.5.2 2020-11-10 CRAN (R 4.0.3)
magrittr 2.0.1.9000 2020-12-14 Github (tidyverse/magrittr@bb1c86a)
Matrix 1.2-18 2019-11-27 CRAN (R 4.0.2)
MatrixModels 0.4-1 2015-08-22 CRAN (R 4.0.0)
matrixStats 0.57.0 2020-09-25 CRAN (R 4.0.2)
memoise 1.1.0 2017-04-21 CRAN (R 4.0.0)
modelr 0.1.8.9000 2020-11-23 Github (tidyverse/modelr@16168e0)
munsell 0.5.0 2018-06-12 CRAN (R 4.0.0)
mvtnorm 1.1-1 2020-06-09 CRAN (R 4.0.2)
patchwork 1.1.0 2020-11-09 CRAN (R 4.0.3)
pbapply 1.4-3 2020-08-18 CRAN (R 4.0.2)
pillar 1.4.7 2020-11-20 CRAN (R 4.0.3)
pkgbuild 1.1.0 2020-07-13 CRAN (R 4.0.3)
pkgconfig 2.0.3 2019-09-22 CRAN (R 4.0.0)
pkgload 1.1.0 2020-05-29 CRAN (R 4.0.0)
png 0.1-7 2013-12-03 CRAN (R 4.0.0)
prettydoc 0.4.0 2020-08-10 CRAN (R 4.0.3)
prettyunits 1.1.1.9000 2020-11-23 Github (r-lib/prettyunits@b1cdad8)
processx 3.4.5 2020-11-30 CRAN (R 4.0.3)
ps 1.5.0 2020-12-05 CRAN (R 4.0.3)
purrr 0.3.4.9000 2020-11-23 Github (tidyverse/purrr@af06d45)
R6 2.5.0 2020-11-02 Github (r-lib/R6@6cf7d4e)
rappdirs 0.3.1 2016-03-28 CRAN (R 4.0.0)
RColorBrewer 1.1-2 2014-12-07 CRAN (R 4.0.0)
Rcpp 1.0.5 2020-07-06 CRAN (R 4.0.3)
readr 1.4.0.9000 2020-11-23 Github (tidyverse/readr@97186a8)
readxl 1.3.1.9000 2020-11-23 Github (tidyverse/readxl@9f85fa5)
remotes 2.2.0.9000 2020-12-03 Github (r-lib/remotes@5d0d9fd)
reprex 0.3.0 2019-05-16 CRAN (R 4.0.0)
reticulate 1.18 2020-10-25 CRAN (R 4.0.3)
rjson 0.2.20 2018-06-08 CRAN (R 4.0.0)
rlang 0.4.9.9000 2020-12-16 Github (r-lib/rlang@b5f7d34)
rmarkdown 2.6.0001 2020-12-15 Github (rstudio/rmarkdown@80f14b2)
rprojroot 2.0.2 2020-11-15 CRAN (R 4.0.3)
rstudioapi 0.13 2020-11-12 CRAN (R 4.0.3)
Rttf2pt1 1.3.8 2020-01-10 CRAN (R 4.0.0)
rvest 0.3.6 2020-07-25 CRAN (R 4.0.2)
S4Vectors 0.28.1 2020-12-09 Bioconductor
sass 0.2.0.9005 2020-12-01 Github (rstudio/sass@1dda864)
scales 1.1.1 2020-05-11 CRAN (R 4.0.3)
sessioninfo 1.1.1 2018-11-05 CRAN (R 4.0.3)
shape 1.4.5 2020-09-13 CRAN (R 4.0.2)
stringi 1.5.3 2020-09-09 CRAN (R 4.0.2)
stringr 1.4.0.9000 2020-11-23 Github (tidyverse/stringr@1f03eb0)
styler 1.3.2.9000 2020-11-30 Github (r-lib/styler@d5b8b0e)
testthat 3.0.0.9000 2020-12-15 Github (r-lib/testthat@6227640)
tibble 3.0.4.9000 2020-12-13 Github (tidyverse/tibble@5881b43)
tidyr 1.1.2.9000 2020-12-09 Github (tidyverse/tidyr@581b488)
tidyselect 1.1.0 2020-05-11 CRAN (R 4.0.3)
tidyverse 1.3.0.9000 2020-11-23 Github (hadley/tidyverse@8a0bb99)
usethis 2.0.0.9000 2020-12-13 Github (r-lib/usethis@45d0fef)
utf8 1.1.4 2018-05-24 CRAN (R 4.0.0)
vctrs 0.3.5 2020-11-17 CRAN (R 4.0.3)
viridisLite 0.3.0 2018-02-01 CRAN (R 4.0.0)
wesanderson 0.3.6 2018-04-20 CRAN (R 4.0.3)
withr 2.3.0 2020-09-22 CRAN (R 4.0.2)
xfun 0.19 2020-10-30 CRAN (R 4.0.3)
xml2 1.3.2 2020-04-23 CRAN (R 4.0.0)
yaml 2.2.1 2020-02-01 CRAN (R 4.0.0)
yarrr 0.1.5 2017-04-19 CRAN (R 4.0.3)